Dimension reduction of the latent space

From a latent space to musical parameters
dimension reduction
auto-encoder
swarm
Author

Max Worgan

Published

February 28, 2022

Introduction

In the first part I reimplemented the convolutional auto encoder from TimeCluster by Ali et al. The the second part, I adapted the model to handle all 300 flock agents.

In this notebook I evaluate serveral processes to turn the latent space representation into parameters for musical control

function normalise(M) 
    min = minimum(minimum(eachcol(M)))
    max = maximum(maximum(eachcol(M)))
    return (M .- min) ./ (max - min)
end

normalised = Array(df) |> normalise

window_size = 60

data = slidingwindow(normalised',window_size,stride=1)

train, validate, test = splitobs(shuffleobs(data), (0.7,0.2));
function create_ae_1d()
  # Define the encoder and decoder networks 
  encoder = Chain(
  # 60x900xb
  Conv((9,), 900 => 9000, relu; pad = SamePad()),
  MaxPool((2,)),
  # 30x9000xb
  Conv((5,), 9000 => 4500, relu; pad = SamePad()),
  MaxPool((2,)),
  # 15x4500xb
  Conv((5,),4500 => 2250, relu; pad = SamePad()),
  # 15x2250xb
  MaxPool((3,)),
  Conv((3,),2250 => 1000, relu; pad = SamePad()),
  Conv((3,),1000 => 100, relu; pad = SamePad()),
  # 5x100xb
  Flux.flatten,
  Dense(500,100)
)
decoder = Chain(
  Dense(100,500),
  (x -> reshape(x, 5,100,:)),
  # 5x100xb
  ConvTranspose((3,), 100  => 1000, relu; pad = SamePad()),
  ConvTranspose((3,), 1000 => 2250, relu; pad = SamePad()),
  Upsample((3,)),
  # 15x2250xb
  ConvTranspose((5,), 2250 => 4500, relu; pad = SamePad()),
  Upsample((2,)),
  # 30x4500xb
  ConvTranspose((5,), 4500 => 9000, relu; pad = SamePad()),
  Upsample((2,)),
  # 60x9000xb
  ConvTranspose((9,), 9000 => 900, relu; pad = SamePad()),
  # 60x900xb
)
return (encoder, decoder)
end
create_ae_1d (generic function with 1 method)

Load saved model

We load the saved model that we trained in the previous step

encoder, decoder  = create_ae_1d()
new_model         = Chain(encoder, decoder)
model_row         = read_model_row("1d_300_model-112-8.0e-6.arrow")
load_weights!(new_model, model_row.weights)
@userplot FlockPlot
@recipe function f(cp::FlockPlot)
    x,y,z = cp.args
    color := :black
    label --> false
    seriestype := :scatter
    alpha := 0.75
    markersize := 3
    xlim := (0, 1)
    ylim := (0, 1)
    zlim := (0, 1)
    x, y, z
end

function create_gif_of_data_latent_space_and_reduced(data, encoded, components, title)
    s = size(components,1)
    t = reshape(getobs(data), 3, 300, s)
    t = permutedims(t, [2,3,1])
    anim = @animate for i  1:s
        A = flockplot(t[:,i,1], t[:,i,2], t[:,i,3],  title = "Input Swarm")
        B = Plots.bar(components[i], title=title, label=""; ylim=(-35,35))
        C = Plots.heatmap(reshape(encoded[i], (10,10)),colorbar=false, color=:autumn, title="Encoded Representation", axis=([], false))
        Plots.plot(A,C,B;layout=(1,3),size = (1100, 400))
    end
    gif(anim, "anim_fps30.gif", fps=30)
end
    
create_gif_of_data_latent_space_and_PCA (generic function with 1 method)

Latent space to parameters

Now that we can encode the movement of the whole swarm over 60 timesteps into 100 variables, I want to reduce that encoding even further into ~10 parameters that can be used to sonify the dynamics.

Since we’ve already used a non linear dimension reduction on our data, lets evaluate PCA, ISOMAP, UMAP to reduce the latent space further.

# Gather all our encoded data into training and test matricies

train_encoded = Matrix{Float32}[]
for t  train
    i      = Flux.unsqueeze(t', 3)
    output = hcat(new_model[1](i)...)
    push!(train_encoded, output)
end

test_encoded = Matrix{Float32}[]
for t  test
    i      = Flux.unsqueeze(t', 3)
    output = hcat(new_model[1](i)...)
    push!(test_encoded, output)
end

PCA

M = fit(PCA, vcat(train_encoded...)'; maxoutdim=10)
PCA(indim = 100, outdim = 7, principalratio = 0.9925308)
varience_preserved = principalratio(M)
0.9925308f0
contributions_of_components = (principalvars(M) ./ tvar(M) * 100)
7-element Vector{Float32}:
 43.02717
 37.324966
 15.405006
  1.8183522
  0.8683387
  0.46416324
  0.34509116
# grab a few consecutive windows of data, encode and plot the data, the latent space and the principal components
components = Matrix{Float32}[]
encoded = Matrix{Float32}[]

start_index = 101

d = hcat(data[start_index],data[start_index+window_size], data[start_index+(window_size*2)], data[start_index+(window_size*3)])
for i  0:239
    t = data[start_index + i]
    encoded_t = new_model[1](Flux.unsqueeze(t',3))
    push!(encoded, encoded_t)
    push!(components,MultivariateStats.transform(M, encoded_t))
end
create_gif_of_data_latent_space_and_reduced(d, encoded,components, "Principal components")
┌ Info: Saved animation to 
│   fn = /notebooks/anim_fps30.gif
└ @ Plots /opt/julia/packages/Plots/LI4FE/src/animation.jl:114

Notes

Since our method is windowed, a particular encoded representation and a particular set of PCA values actually represents a full window (60 frames / 2 seconds). In order to make the above, I calculated the encoding and the PCA projection for each window, using a step size of one (as was used in the training). So in fact the above is somewhat out of sync, since the encoded and PCA projections are 1 second in front of the raw data, and in fact referencing some data that isn’t visable in the Input Swarm visualisation.

ISOMAP

The implementation of ISOMAP found in ManifoldLearning.jl is unusual for manifold learning algorithms because we can transform out of sample data into an existing model.

M = fit(Isomap, (vcat(train_encoded...) |> f64)')
Isomap{BruteForce}(indim = 100, outdim = 2, neighbors = 12)
Y = ManifoldLearning.transform(M)
2×1749 Matrix{Float64}:
 35.7851    -186.37      206.575     …  -60.2106    -72.1667     -37.1274
 -0.134194     0.145393    0.175012      -0.123771   -0.0541171   -0.130726
Plots.scatter(T[1,:],T[2,:];label="", title="2D manifold of encoded space by ISOMAP")

# Grab some data form the encoded test set
e = rand(test_encoded)' |> f64;
# Transform the out-of-sample test data into the existing ISOMAP model
@time new_data = predict(M,e);
  0.048351 seconds (15.30 k allocations: 25.509 MiB, 66.18% gc time)
# Overlay the new data onto the existing 2D model
Plots.scatter!([new_data[1]], [new_data[2]], label="new data")

I’m interested in reducing to more than 2 dimensions so lets try with 7 in line with the PCA example above

M7 = fit(Isomap, (vcat(train_encoded...) |> f64)';maxoutdim=7)
Isomap{BruteForce}(indim = 100, outdim = 7, neighbors = 12)
isomap = Matrix{Float32}[]
encoded = Matrix{Float32}[]

start_index = 101

d = hcat(data[start_index],data[start_index+window_size], data[start_index+(window_size*2)], data[start_index+(window_size*3)])
for i  0:239
    t = data[start_index + i]
    encoded_t = new_model[1](Flux.unsqueeze(t',3)) |> f64
    push!(encoded, encoded_t)
    push!(isomap,predict(M7, encoded_t))
end
create_gif_of_data_latent_space_and_reduced(d, encoded,components, "ISOMAP representation")
┌ Info: Saved animation to 
│   fn = /notebooks/anim_fps30.gif
└ @ Plots /opt/julia/packages/Plots/LI4FE/src/animation.jl:114

UMAP

# use UMAP to do additional encoding on the latent space
# unfortunatly there is no easy way to do out-of-sample transformation

res_jl = umap(vcat(train_encoded...); n_neighbors=5, min_dist=0.001, n_epochs=100)
Plots.scatter(res_jl[1,:], res_jl[2,:], title="UAMP encoding", marker=(2, 2, :auto), label="")

Interesting to see some clusters emerge in the 2D representation. Could this be used for behaviour classification? Without out-of-sample data prediction, it’s not going to be useful in the context I want to use it.